library(rstan)
## Warning: package 'rstan' was built under R version 4.3.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.3.3
## 
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(rstanarm)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.3
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
## 
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
## 
##     loo
library(bayesplot)
## Warning: package 'bayesplot' was built under R version 4.3.3
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(tidyr) # for pivot_longer
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:rstan':
## 
##     extract
library(dplyr) # for %>%
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(truncnorm) # for rnorm with minimum and maximum values
library(loo)
## Warning: package 'loo' was built under R version 4.3.3
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
## 
##     loo
library(lubridate) # for simplifying working with dates
## Warning: package 'lubridate' was built under R version 4.3.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(posterior) # for checking warnings of stan model
## Warning: package 'posterior' was built under R version 4.3.3
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## The following object is masked from 'package:bayesplot':
## 
##     rhat
## The following objects are masked from 'package:rstan':
## 
##     ess_bulk, ess_tail
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## The following objects are masked from 'package:base':
## 
##     %in%, match
# Create custom palette because I want to distinguish well between nations and don't like the existing options
custom_palette <- c("#9F002E", "#B23AEE", "#FF50FF", "#FF7F00", "#FFB900", "#00EEEE", "#4EEE94","#458B00", "#4876FF")

# Load the dataset
#install.packages("mlmRev")
library(mlmRev)
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.3.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
data("Mmmec")

# Check for missing values
colSums(is.na(Mmmec))
##   nation   region   county   deaths expected      uvb 
##        0        0        0        0        0        0
# Summarize statistics about deaths and uvb overall
summary(Mmmec$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    8.00   14.50   27.83   31.00  313.00
# 1st quantile = 8 means that 25% of the observations have less than 8 deaths
# 3rd quantile = 31 means that 75% of the observations have less than 31 deaths
deaths_by_country <- aggregate(deaths ~ nation, data = Mmmec, sum)
print(deaths_by_country)
##        nation deaths
## 1     Belgium    449
## 2   W.Germany   2949
## 3     Denmark    681
## 4      France   1495
## 5          UK   2179
## 6       Italy   1462
## 7     Ireland     67
## 8  Luxembourg     23
## 9 Netherlands    546
summary(Mmmec$uvb)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -8.900200 -4.158400 -0.886400  0.000204  3.275525 13.359000
# Explorative analysis

# Some histograms of the distribution of deaths and expected deaths

# Only deaths
ggplot(Mmmec, aes(x = deaths)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  ggtitle("Distribution of Deaths")

ggsave(file="images/distribution_deaths.pdf", width=18,height=10.5)

# Only expected deaths
ggplot(Mmmec, aes(x = expected)) +
  geom_histogram(binwidth = 1, fill = "yellow", color = "black") +
  ggtitle("Distribution of Expected Deaths")

ggsave(file="images/distribution_expecteddeaths.pdf", width=18,height=10.5)

# Both deaths and expected deaths
# Gather the data into a long format using pivot_longer
Mmmec_long <- Mmmec %>%
  pivot_longer(cols = c(deaths, expected), names_to = "type", values_to = "value")
# Create an overlapped (position="identity") histogram with transparency (alpha=0.6)
ggplot(Mmmec_long, aes(x = value, fill = type)) +
  geom_histogram(binwidth = 1, alpha = 0.6, position = "identity", color = "black") +
  ggtitle("Distribution of Deaths and Expected Deaths") +
  xlab("Deaths / Expected Deaths") +
  scale_fill_manual(values = c("deaths" = "red", "expected" = "yellow"), labels = c("Deaths", "Expected Deaths"))

ggsave(file="images/distribution_deaths_expected.pdf", width=18,height=10.5)

# Some scatter plots of deaths VS UVB exposure

# With a single regression line
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_smooth(method = "lm", color = "blue") +
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure") +
  geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# With a regression line for each nation
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_smooth(method = "lm", se = FALSE) +  # Adds a regression line for each nation
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure") +
  geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_regression_by_nation.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Or dividing by nation using facet_wrap, so that it is less chaotic
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_point() +
  geom_smooth(method = "lm", color = "blue") +
  facet_wrap(~ nation) +
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure by Nation") +
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_by_nation.pdf", width=8,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Boxplots of deaths by nation
ggplot(Mmmec, aes(x = nation, y = deaths)) +
  geom_boxplot(fill = custom_palette) +
  ggtitle("Deaths across counties in nations")

ggsave(file="images/boxplot_deaths_by_nation.pdf", width=8,height=7)


# Generalized linear mixed model with frequentist approach (in particular hierarchical approach)
M1 <- glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
# (1 | k) includes varying intercepts for each k
# log(expected) is an offset term to adjust the model to account for the expected number of deaths
summary(M1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: deaths ~ uvb + (1 | region) + (1 | nation)
##    Data: Mmmec
##  Offset: log(expected)
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    2198.7    2214.2   -1095.3    2190.7       350 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9440 -0.7788 -0.0071  0.6277  3.9102 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  region (Intercept) 0.04829  0.2198  
##  nation (Intercept) 0.13708  0.3702  
## Number of obs: 354, groups:  region, 78; nation, 9
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.06398    0.13351  -0.479   0.6318  
## uvb         -0.02822    0.01139  -2.478   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## uvb 0.185
# Bayes approach relying on HMC sampling from the posterior distribution
M1.rstanarm <- stan_glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 6.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.64 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 4.498 seconds (Warm-up)
## Chain 1:                3.279 seconds (Sampling)
## Chain 1:                7.777 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3.3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.33 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 4.536 seconds (Warm-up)
## Chain 2:                3.223 seconds (Sampling)
## Chain 2:                7.759 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 3.9e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 4.485 seconds (Warm-up)
## Chain 3:                2.986 seconds (Sampling)
## Chain 3:                7.471 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 2.9e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 3.937 seconds (Warm-up)
## Chain 4:                3.272 seconds (Sampling)
## Chain 4:                7.209 seconds (Total)
## Chain 4:
print(M1.rstanarm)
## stan_glmer
##  family:       poisson [log]
##  formula:      deaths ~ uvb + (1 | region) + (1 | nation)
##  observations: 354
## ------
##             Median MAD_SD
## (Intercept) -0.1    0.2  
## uvb          0.0    0.0  
## 
## Error terms:
##  Groups Name        Std.Dev.
##  region (Intercept) 0.23    
##  nation (Intercept) 0.48    
## Num. levels: region 78, nation 9 
## 
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg
summary(M1.rstanarm)
## 
## Model Info:
##  function:     stan_glmer
##  family:       poisson [log]
##  formula:      deaths ~ uvb + (1 | region) + (1 | nation)
##  algorithm:    sampling
##  sample:       4000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 354
##  groups:       region (78), nation (9)
## 
## Estimates:
##                                         mean   sd   10%   50%   90%
## (Intercept)                           -0.1    0.2 -0.3  -0.1   0.1 
## uvb                                    0.0    0.0  0.0   0.0   0.0 
## b[(Intercept) region:1]                0.4    0.1  0.2   0.4   0.6 
## b[(Intercept) region:2]                0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:3]               -0.5    0.1 -0.6  -0.4  -0.3 
## b[(Intercept) region:4]               -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:5]                0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:6]                0.4    0.1  0.2   0.4   0.5 
## b[(Intercept) region:7]                0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:8]                0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:9]               -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:10]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:11]              -0.3    0.1 -0.4  -0.3  -0.2 
## b[(Intercept) region:12]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:13]              -0.2    0.1 -0.3  -0.2   0.0 
## b[(Intercept) region:14]               0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:15]               0.2    0.1  0.0   0.2   0.4 
## b[(Intercept) region:16]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:17]              -0.1    0.1 -0.3  -0.1   0.0 
## b[(Intercept) region:18]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:19]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:20]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:21]              -0.2    0.1 -0.4  -0.2   0.0 
## b[(Intercept) region:22]               0.0    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:23]               0.2    0.1  0.1   0.2   0.3 
## b[(Intercept) region:24]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:25]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:27]               0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:28]               0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:29]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:30]               0.1    0.1  0.0   0.1   0.3 
## b[(Intercept) region:31]               0.1    0.2 -0.1   0.1   0.3 
## b[(Intercept) region:32]              -0.2    0.1 -0.3  -0.2   0.0 
## b[(Intercept) region:33]               0.0    0.1 -0.2   0.0   0.1 
## b[(Intercept) region:34]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:35]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:36]              -0.4    0.1 -0.6  -0.4  -0.2 
## b[(Intercept) region:37]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:38]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:39]               0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:40]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:41]               0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:42]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:43]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:44]               0.3    0.1  0.2   0.3   0.4 
## b[(Intercept) region:45]               0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:46]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:47]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:48]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:49]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:50]              -0.2    0.1 -0.4  -0.2  -0.1 
## b[(Intercept) region:51]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:52]              -0.1    0.2 -0.3  -0.1   0.1 
## b[(Intercept) region:53]              -0.4    0.2 -0.6  -0.4  -0.2 
## b[(Intercept) region:54]              -0.6    0.1 -0.7  -0.6  -0.4 
## b[(Intercept) region:55]               0.2    0.1  0.1   0.2   0.3 
## b[(Intercept) region:56]               0.4    0.1  0.2   0.3   0.5 
## b[(Intercept) region:57]               0.1    0.1  0.0   0.1   0.3 
## b[(Intercept) region:58]               0.2    0.1  0.0   0.2   0.4 
## b[(Intercept) region:59]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:60]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:61]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:62]               0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:63]              -0.2    0.1 -0.4  -0.2  -0.1 
## b[(Intercept) region:64]              -0.1    0.1 -0.3  -0.1   0.1 
## b[(Intercept) region:65]              -0.2    0.1 -0.4  -0.2   0.0 
## b[(Intercept) region:66]               0.2    0.1  0.0   0.2   0.3 
## b[(Intercept) region:67]               0.1    0.2 -0.1   0.1   0.3 
## b[(Intercept) region:68]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:69]               0.1    0.2 -0.1   0.1   0.4 
## b[(Intercept) region:70]               0.1    0.1  0.0   0.1   0.3 
## b[(Intercept) region:71]              -0.1    0.2 -0.4  -0.1   0.1 
## b[(Intercept) region:72]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:73]               0.0    0.2 -0.3   0.0   0.2 
## b[(Intercept) region:74]               0.0    0.2 -0.3   0.0   0.3 
## b[(Intercept) region:75]               0.0    0.2 -0.3   0.0   0.3 
## b[(Intercept) region:76]              -0.1    0.1 -0.3  -0.1   0.1 
## b[(Intercept) region:77]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:78]               0.2    0.1  0.1   0.2   0.4 
## b[(Intercept) region:79]              -0.1    0.1 -0.3  -0.1   0.0 
## b[(Intercept) nation:Belgium]         -0.1    0.2 -0.3  -0.1   0.2 
## b[(Intercept) nation:W.Germany]        0.5    0.2  0.3   0.5   0.7 
## b[(Intercept) nation:Denmark]          0.6    0.2  0.4   0.6   0.9 
## b[(Intercept) nation:France]          -0.5    0.2 -0.7  -0.5  -0.3 
## b[(Intercept) nation:UK]              -0.1    0.2 -0.3  -0.1   0.1 
## b[(Intercept) nation:Italy]            0.0    0.2 -0.3   0.0   0.2 
## b[(Intercept) nation:Ireland]         -0.5    0.2 -0.8  -0.5  -0.2 
## b[(Intercept) nation:Luxembourg]       0.0    0.3 -0.3   0.0   0.4 
## b[(Intercept) nation:Netherlands]      0.1    0.2 -0.2   0.1   0.3 
## Sigma[region:(Intercept),(Intercept)]  0.1    0.0  0.0   0.1   0.1 
## Sigma[nation:(Intercept),(Intercept)]  0.2    0.2  0.1   0.2   0.4 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 27.8    0.4 27.3  27.8  28.4 
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                                       mcse Rhat n_eff
## (Intercept)                           0.0  1.0  1228 
## uvb                                   0.0  1.0  1768 
## b[(Intercept) region:1]               0.0  1.0  3322 
## b[(Intercept) region:2]               0.0  1.0  3000 
## b[(Intercept) region:3]               0.0  1.0  3220 
## b[(Intercept) region:4]               0.0  1.0  1539 
## b[(Intercept) region:5]               0.0  1.0  1624 
## b[(Intercept) region:6]               0.0  1.0  2127 
## b[(Intercept) region:7]               0.0  1.0  4507 
## b[(Intercept) region:8]               0.0  1.0  2435 
## b[(Intercept) region:9]               0.0  1.0  1802 
## b[(Intercept) region:10]              0.0  1.0  1556 
## b[(Intercept) region:11]              0.0  1.0  1481 
## b[(Intercept) region:12]              0.0  1.0  1963 
## b[(Intercept) region:13]              0.0  1.0  3509 
## b[(Intercept) region:14]              0.0  1.0  2059 
## b[(Intercept) region:15]              0.0  1.0  2352 
## b[(Intercept) region:16]              0.0  1.0  2604 
## b[(Intercept) region:17]              0.0  1.0  2493 
## b[(Intercept) region:18]              0.0  1.0  4984 
## b[(Intercept) region:19]              0.0  1.0  4412 
## b[(Intercept) region:20]              0.0  1.0  5063 
## b[(Intercept) region:21]              0.0  1.0  5680 
## b[(Intercept) region:22]              0.0  1.0  5168 
## b[(Intercept) region:23]              0.0  1.0  3901 
## b[(Intercept) region:24]              0.0  1.0  4665 
## b[(Intercept) region:25]              0.0  1.0  6298 
## b[(Intercept) region:27]              0.0  1.0  5785 
## b[(Intercept) region:28]              0.0  1.0  4679 
## b[(Intercept) region:29]              0.0  1.0  2712 
## b[(Intercept) region:30]              0.0  1.0  3792 
## b[(Intercept) region:31]              0.0  1.0  6634 
## b[(Intercept) region:32]              0.0  1.0  4631 
## b[(Intercept) region:33]              0.0  1.0  4573 
## b[(Intercept) region:34]              0.0  1.0  3733 
## b[(Intercept) region:35]              0.0  1.0  4532 
## b[(Intercept) region:36]              0.0  1.0  5014 
## b[(Intercept) region:37]              0.0  1.0  5474 
## b[(Intercept) region:38]              0.0  1.0  2830 
## b[(Intercept) region:39]              0.0  1.0  3415 
## b[(Intercept) region:40]              0.0  1.0  3186 
## b[(Intercept) region:41]              0.0  1.0  2591 
## b[(Intercept) region:42]              0.0  1.0  3156 
## b[(Intercept) region:43]              0.0  1.0  2312 
## b[(Intercept) region:44]              0.0  1.0  1589 
## b[(Intercept) region:45]              0.0  1.0  2064 
## b[(Intercept) region:46]              0.0  1.0  2252 
## b[(Intercept) region:47]              0.0  1.0  2416 
## b[(Intercept) region:48]              0.0  1.0  2660 
## b[(Intercept) region:49]              0.0  1.0  2294 
## b[(Intercept) region:50]              0.0  1.0  4753 
## b[(Intercept) region:51]              0.0  1.0  6778 
## b[(Intercept) region:52]              0.0  1.0  5489 
## b[(Intercept) region:53]              0.0  1.0  4433 
## b[(Intercept) region:54]              0.0  1.0  3852 
## b[(Intercept) region:55]              0.0  1.0  3255 
## b[(Intercept) region:56]              0.0  1.0  3821 
## b[(Intercept) region:57]              0.0  1.0  3798 
## b[(Intercept) region:58]              0.0  1.0  4444 
## b[(Intercept) region:59]              0.0  1.0  2107 
## b[(Intercept) region:60]              0.0  1.0  5193 
## b[(Intercept) region:61]              0.0  1.0  7668 
## b[(Intercept) region:62]              0.0  1.0  2466 
## b[(Intercept) region:63]              0.0  1.0  3849 
## b[(Intercept) region:64]              0.0  1.0  5345 
## b[(Intercept) region:65]              0.0  1.0  3294 
## b[(Intercept) region:66]              0.0  1.0  3960 
## b[(Intercept) region:67]              0.0  1.0  5081 
## b[(Intercept) region:68]              0.0  1.0  5793 
## b[(Intercept) region:69]              0.0  1.0  5740 
## b[(Intercept) region:70]              0.0  1.0  2765 
## b[(Intercept) region:71]              0.0  1.0  5578 
## b[(Intercept) region:72]              0.0  1.0  4949 
## b[(Intercept) region:73]              0.0  1.0  5335 
## b[(Intercept) region:74]              0.0  1.0  7432 
## b[(Intercept) region:75]              0.0  1.0  4392 
## b[(Intercept) region:76]              0.0  1.0  3620 
## b[(Intercept) region:77]              0.0  1.0  3155 
## b[(Intercept) region:78]              0.0  1.0  2568 
## b[(Intercept) region:79]              0.0  1.0  3060 
## b[(Intercept) nation:Belgium]         0.0  1.0  1643 
## b[(Intercept) nation:W.Germany]       0.0  1.0  1340 
## b[(Intercept) nation:Denmark]         0.0  1.0  1539 
## b[(Intercept) nation:France]          0.0  1.0  1297 
## b[(Intercept) nation:UK]              0.0  1.0  1230 
## b[(Intercept) nation:Italy]           0.0  1.0  1291 
## b[(Intercept) nation:Ireland]         0.0  1.0  1890 
## b[(Intercept) nation:Luxembourg]      0.0  1.0  2643 
## b[(Intercept) nation:Netherlands]     0.0  1.0  1535 
## Sigma[region:(Intercept),(Intercept)] 0.0  1.0  1367 
## Sigma[nation:(Intercept),(Intercept)] 0.0  1.0  1746 
## mean_PPD                              0.0  1.0  3915 
## log-posterior                         0.3  1.0   914 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# Extract Leave-One-Out Cross-Validation
loo_M1rs <- loo(M1.rstanarm)
## Warning: Found 14 observations with a pareto_k > 0.7. With this many problematic observations we recommend calling 'kfold' with argument 'K=10' to perform 10-fold cross-validation rather than LOO.
print(loo_M1rs)
## 
## Computed from 4000 by 354 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1069.3 25.7
## p_loo        77.4  8.5
## looic      2138.6 51.3
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.4, 2.1]).
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. ESS
## (-Inf, 0.7]   (good)     340   96.0%   190     
##    (0.7, 1]   (bad)       10    2.8%   <NA>    
##    (1, Inf)   (very bad)   4    1.1%   <NA>    
## See help('pareto-k-diagnostic') for details.
# Using 10-fold CV as suggested by warning due to some observations with too high pareto_k,
# there is a warning about 1 divergent transition after warmup
#kfold_result <- kfold(M1.rstanarm, K = 10)
# But since Rhat=1 and ESS (n_eff)>1000 for every parameter, the resulting posterior is often good enough to move forward


# Posterior predictive checks
y_rep <- posterior_predict(M1.rstanarm)

# densities 
pp_check(M1.rstanarm)

ggsave(file="images/densities_M1rstanarm.pdf", width=8,height=7)

# Plot the credible intervals
beta_names <- c(paste0("beta^", c("uvb")), "gl.intercept")
alpha_names<-c()
for (i in 1:25){
  alpha_names[i] <- paste0("region[", i,"]")
} # region 26 is missing
for (i in 27:79){
  alpha_names[i-1] <- paste0("region[", i,"]")
}
alpha_names[79] <- paste0("Belgium")
alpha_names[80] <- paste0("WG")
alpha_names[81] <- paste0("Denmark")
alpha_names[82] <- paste0("France")
alpha_names[83] <- paste0("UK")
alpha_names[84] <- paste0("Italy")
alpha_names[85] <- paste0("Ireland")
alpha_names[86] <- paste0("Luxembourg")
alpha_names[87] <- paste0("Netherlands")
alpha_names[88] <- paste0(expression(sigma[region]))
alpha_names[89] <- paste0(expression(sigma[nation]))
posterior_M1 <- as.matrix(M1.rstanarm)
mcmc_intervals(posterior_M1, regex_pars=c( "uvb",
                                           "(Intercept)", "b"))+
  xaxis_text(on =TRUE, size=rel(1.9))+
  yaxis_text(on =TRUE, size=rel(1.4))+
  scale_y_discrete(labels = ((parse(text= c(beta_names, alpha_names)))))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggsave(file="images/logistic_credible_intervals.pdf", width=5, height=length(alpha_names) * 0.25)

# Plot the posterior marginal densities along with 50% intervals for the ‘fixed-effects’ uvb
mcmc_areas(posterior_M1, regex_pars = c("uvb"))+
  xaxis_text(on =TRUE, size=rel(1.9))+
  yaxis_text(on =TRUE, size=rel(1.4))

ggsave(file = "images/logistic_fixed_effects.pdf", width=8, height=7)

# Plot the random effects (posterior mean +- s.e.)
int_ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$x
ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$ix
region.abbr <- levels(Mmmec$region)
region.abbr.ord <- region.abbr[ord]
se_ord <- M1.rstanarm$ses[ord]
par(xaxt="n", mfrow=c(1,1), mar = c(5,2,2,1))
plot(1:length(int_ord), int_ord, ylim=c(-1.4,1.4), pch=19, bg=2, xlab="Regions", 
      ylab="Intercepts",  cex.main=1.9, cex.lab=1.9)
for (h in 1:length(int_ord)){
  segments(h, int_ord[h]-se_ord[h], h, int_ord[h]+se_ord[h], col="red")
  is.wholenumber <-
    function(x, tol = .Machine$double.eps^0.5) 
      abs(x - round(x)) < tol
  if (is.wholenumber(h/2)){
    text(h, int_ord[h]+se_ord[h]+0.1, region.abbr.ord[h], cex=1.1)}else{
      text(h, int_ord[h]-se_ord[h]-0.1, region.abbr.ord[h], cex=1.1)
    }
}

ggsave(file="images/random_effects_log.pdf", height=7, width=length(int_ord) * 0.4)

# empirical distribution function
ppc_ecdf_overlay(y = M1.rstanarm$y, y_rep[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/ecdf_M1.rstanarm.pdf", width=8, height=7)

# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat = "prop_zero")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_M1.rstanarm.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="mean")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="sd")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="median")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="max")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep <- colMeans(y_rep)
std_resid <- (M1.rstanarm$y - mean_y_rep) / sqrt(mean_y_rep)
qplot(mean_y_rep, std_resid) + hline_at(2) + hline_at(-2)+
  labs(x="Mean of y_rep", y= "Standardized residuals")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=16),
        axis.title.y = element_text(size=16))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(file="images/standardized_residuals_M1.rstanarm.pdf", width =8, height =7)

# predictive intervals
ppc_intervals(
  y = M1.rstanarm$y, 
  yrep = y_rep) + 
  labs(x = "Deaths", y = "Expected deaths")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_M1.rstanarm.pdf", width =8, height =7)



# Stan model of model A of the paper
# It is a variance components model with UVBI included in the fixed part of the model and
# random terms s_k, u_jk, e_ijk associated respectively with the intercept at level 3, 2, 1
# s_k ~ normal(0, sigma_s)
# u_jk ~ normal(0, sigma_u)

# Create UVB index as in Table III of the paper
uvbi_params <- data.frame(
  nation = c(unique(Mmmec$nation)),
  mean_UVBI = c(12.70, 12.79, 9.96, 17.18, 10.91, 21.45, 10.54, 13.26, 11.40),
  sd_UVBI = c(0.29, 1.35, 0.38, 2.59, 1.50, 3.51, 0.60, 0.05, 0.47),
  min_UVBI = c(12.17, 10.45, 9.47, 12.92, 6.69, 16.83, 9.64, 13.22, 10.62),
  max_UVBI = c(13.10, 15.15, 10.49, 23.24, 13.46, 28.95, 11.70, 13.31, 11.94)
)
# Join UVBI data to the dataframe Mmmec
Mmmec2 <- left_join(Mmmec, uvbi_params, by = "nation")
# Generate UVBI values for each county from mean_UVBI and sd_UVBI, respecting min_UVBI and max_UVBI
Mmmec2 <- Mmmec2 %>%
  mutate(UVBI = rtruncnorm(n(), a = min_UVBI, b = max_UVBI, mean = mean_UVBI, sd = sd_UVBI))
# Prepare data for stan model
stan_data <- list(
  N = nrow(Mmmec2),
  deaths = Mmmec2$deaths,
  expected = Mmmec2$expected,
  K = length(unique(Mmmec2$nation)),
  J = length(unique(Mmmec2$region)),
  k = as.integer(as.factor(Mmmec2$nation)),
  j = as.integer(as.factor(Mmmec2$region)),
  UVBI = Mmmec2$UVBI
)
comp_model_A <- stan_model('poisson_regression.stan')
fit_model_A <- sampling(comp_model_A, data = stan_data, seed = 123, iter=4000)
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 8.2e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.82 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 1: Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 1: Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 1: Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 1: Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 1: Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 1: Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 1: Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 1: Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 1: Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 1: Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 1: Iteration: 4000 / 4000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 6.338 seconds (Warm-up)
## Chain 1:                5.709 seconds (Sampling)
## Chain 1:                12.047 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 2.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 2: Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 2: Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 2: Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 2: Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 2: Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 2: Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 2: Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 2: Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 2: Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 2: Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 2: Iteration: 4000 / 4000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 6.807 seconds (Warm-up)
## Chain 2:                6.532 seconds (Sampling)
## Chain 2:                13.339 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 2.8e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 3: Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 3: Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 3: Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 3: Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 3: Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 3: Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 3: Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 3: Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 3: Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 3: Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 3: Iteration: 4000 / 4000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 6.423 seconds (Warm-up)
## Chain 3:                6.035 seconds (Sampling)
## Chain 3:                12.458 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 2.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 4: Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 4: Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 4: Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 4: Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 4: Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 4: Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 4: Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 4: Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 4: Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 4: Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 4: Iteration: 4000 / 4000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 5.654 seconds (Warm-up)
## Chain 4:                6.049 seconds (Sampling)
## Chain 4:                11.703 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
print(fit_model_A, pars = c('beta0','beta1','sigma_s','sigma_u','sigma_e'))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=4000; warmup=2000; thin=1; 
## post-warmup draws per chain=2000, total post-warmup draws=8000.
## 
##          mean se_mean   sd  2.5%   25%   50%  75% 97.5% n_eff Rhat
## beta0   -0.07       0 0.21 -0.47 -0.21 -0.07 0.07  0.33  1911 1.00
## beta1    0.00       0 0.01 -0.01  0.00  0.00 0.01  0.02  6244 1.00
## sigma_s  0.50       0 0.16  0.28  0.40  0.47 0.58  0.90  4427 1.00
## sigma_u  0.23       0 0.03  0.18  0.21  0.23 0.25  0.29  2729 1.00
## sigma_e  0.12       0 0.02  0.08  0.10  0.12 0.13  0.16   287 1.01
## 
## Samples were drawn using NUTS(diag_e) at Sun Jun 15 18:53:45 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
y_rep_model_A <- as.matrix(fit_model_A, pars = "y_rep")

# Checks due to warnings

check_divergences(fit_model_A)
## 0 of 8000 iterations ended with a divergence.
check_treedepth(fit_model_A)
## 0 of 8000 iterations saturated the maximum tree depth of 10.
check_energy(fit_model_A)
## E-BFMI indicated possible pathological behavior:
##   Chain 1: E-BFMI = 0.146
##   Chain 2: E-BFMI = 0.140
##   Chain 3: E-BFMI = 0.142
##   Chain 4: E-BFMI = 0.104
## E-BFMI below 0.2 indicates you may need to reparameterize your model.
# Bayesian Fraction of Missing Information is in fact low
# but reparametrizing the model would make it different from the paper's

# The warning about bulk and tail ESS too low disappears with 8000 iterations instead of 4000
# but since posterior predictive checks do not improve significantly, it is unnecessary to use 8000
ess <- summarise_draws(as_draws_array(fit_model_A))
print(ess[1:5,c(1, 8, 9, 10)])
## # A tibble: 5 × 4
##   variable  rhat ess_bulk ess_tail
##   <chr>    <dbl>    <dbl>    <dbl>
## 1 beta0     1.00    1992.    2630.
## 2 beta1     1.00    6337.    5855.
## 3 sigma_s   1.00    5310.    4764.
## 4 sigma_u   1.00    2671.    5089.
## 5 sigma_e   1.01     287.     420.
# For sigma_e with iter=8000: ess_bulk=547, ess_tail=927, Rhat=1.00


# Posterior predictive checks

# Traceplot of the 4 chains to see if they mix well: sigma_e is in fact not as good as the others
traceplot(fit_model_A, pars = c('beta0', 'beta1', 'sigma_s', 'sigma_u', 'sigma_e'))

ggsave(file="images/traceplot.pdf", width=8, height=7)

# densities 
ppc_dens_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/densities_model_A.pdf", width=8, height=7)

# empirical distribution function
ppc_ecdf_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/ecdf_model_A.pdf", width=8, height=7)

# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat = "prop_zero")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_model_A.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="mean")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="sd")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="median")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="max")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep_model_A <- colMeans(y_rep_model_A)
std_resid <- (stan_data$deaths - mean_y_rep_model_A) / sqrt(mean_y_rep_model_A)
qplot(mean_y_rep_model_A, std_resid) + hline_at(2) + hline_at(-2)+
  labs(x="Mean of y_rep", y= "Standardized residuals")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=16),
        axis.title.y = element_text(size=16))

ggsave(file="images/standardized_residuals_model_A.pdf", width =8, height =7)

# predictive intervals
ppc_intervals(
  y = stan_data$deaths, 
  yrep = y_rep_model_A,
  x = stan_data$uvb
) + 
  labs(x = "Deaths", y = "Expected deaths")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_model_A.pdf", width =8, height =7)

# Extract Leave-One-Out Cross-Validation
log_lik_A <- extract_log_lik(fit_model_A) # extract pointwise log-likelihood from stan model
loo_A <- loo(log_lik_A)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
print(loo_A)
## 
## Computed from 8000 by 354 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1054.2 21.3
## p_loo       112.6  9.5
## looic      2108.3 42.6
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume independent draws (r_eff=1).
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. ESS
## (-Inf, 0.7]   (good)     317   89.5%   248     
##    (0.7, 1]   (bad)       35    9.9%   <NA>    
##    (1, Inf)   (very bad)   2    0.6%   <NA>    
## See help('pareto-k-diagnostic') for details.